import os
import sys
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import scanpy as sc
import loompy
import seaborn as sb
import cellrank as cr
from cellrank.tl.estimators import GPCCA
from cellrank.tl.kernels import VelocityKernel
import scvelo as scv
scv.__version__
%matplotlib inline
wgenes=pd.read_csv('/Users/menghan/Documents/projects/genome_ref/ensembl97_wgenes.txt',sep='\t')
adata=sc.read_loom("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_nasal.loom")
ldata = scv.read("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/velocity_9samples.loom", cache=True)
#remove W chr genes
wolp=adata.var_names.intersection(wgenes['ensembl_gene_id'])
print(wolp)
w_gene_indicator = np.in1d(adata.var_names, wgenes['ensembl_gene_id'])
adata = adata[:, ~w_gene_indicator]
adata_loom = ldata[:, adata.var_names]
adata = scv.utils.merge(adata, adata_loom); del ldata
#rename seurat_clusters
sk_module=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_nasal_skModule.txt',sep='\t')
adata.obs['seurat_clusters']=sk_module['seurat_clusters'].values
adata.obs["seurat_clusters"] = adata.obs["seurat_clusters"].astype('category')
scv.pl.proportions(adata, groupby="seurat_clusters")
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')
print(adata.obs['seurat_clusters'].cat.categories)
recolor=["#65000B","#FC89AC","#FFC0CB","#DC0078","#B0445C","#FCAACF"]
adata.uns['seurat_clusters_colors']=recolor
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')
## run scVelo
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='seurat_clusters',legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4,
dpi=120,save="figure1_i")
#error: https://github.com/theislab/scvelo/issues/279
#plt.savefig('/Users/menghan/Documents/projects/skeletonConvergence/manuscript/figures/figure1_formal/figure1_I-0.pdf',dpi=300)
from matplotlib import cm
from matplotlib.colors import ListedColormap, LinearSegmentedColormap
#https://matplotlib.org/stable/tutorials/colors/colormaps.html
#https://matplotlib.org/3.1.0/tutorials/colors/colormap-manipulation.html
def plot_examples(cms):
"""
helper function to plot two colormaps
"""
np.random.seed(19680801)
data = np.random.randn(30, 30)
fig, axs = plt.subplots(1, 2, figsize=(6, 3), constrained_layout=True)
for [ax, cmap] in zip(axs, cms):
psm = ax.pcolormesh(data, cmap=cmap, rasterized=True, vmin=-3, vmax=3)
fig.colorbar(psm, ax=ax)
plt.show()
top = cm.get_cmap('gray', 256)
bottom = cm.get_cmap('YlOrRd', 256)
gray1 = np.array([211/256, 211/256, 211/256, 1])
gray2 = np.array([189/256, 189/256, 189/256, 1])
gray3 = np.array([126/256, 126/256, 126/256, 1])
newcolors = np.vstack((top(np.linspace(0.5, 1, 84)),
bottom(np.linspace(0, 1, 172))))
#from colormap import hex2rgb
#print(hex2rgb('#d3d3d3'));print(hex2rgb('#bdbdbd'));print(hex2rgb('#7e7e7e'))
#newcolors[:28, :] = gray3; newcolors[:42, :] = gray2; newcolors[42:84, :] = gray1
newcmp = ListedColormap(newcolors, name='GrayRed')
plot_examples([newcmp])
#check if SOX9 in the variable genes
print('ENSGALG00000004386' in adata.var_names)
sox9=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_nasal_sox9Exp.txt',sep='\t')
adata.obs['SOX9_RNA']=sox9['x'].values
#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='SOX9_RNA', color_map=newcmp,
legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_i_sox9")
adata.obs['skeletal_Module']=sk_module['skeletal_Features1'].values
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='skeletal_Module', color_map=newcmp,
legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_i_skModule")
#https://cellrank.readthedocs.io/en/latest/kernels_and_estimators.html#Infer-terminal-states
#Combining two kernels
#RNA velocity vectors can sometimes be very noisy - to regularize them, let’s combine the VelocityKernel with a ConnectivityKernel, computed on the basis of transcriptomic similarity
vk = VelocityKernel(adata)
vk.compute_transition_matrix()
print(vk)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata).compute_transition_matrix()
combined_kernel = 0.8 * vk + 0.2 * ck
print(combined_kernel)
g = GPCCA(combined_kernel)
print(g)
g.compute_schur(n_components=20)
#Above, we plotted the top 20 eigenvalues of the matrix T to see whether there is an apparent eigengap.
g.plot_spectrum()
g.plot_schur(use=4, basis='xtsne_cell_embeddings')
#Infer terminal states
#The next step in GPCCA is to find a linear combination of these vectors such that the Markov chain defined on the subset of states has large self-transition probability
g.compute_macrostates(n_states=4, cluster_key='seurat_clusters')
g.plot_macrostates(basis='xtsne_cell_embeddings')
g.plot_macrostates(same_plot=False,basis='xtsne_cell_embeddings')
g.plot_macrostates(discrete=True,basis='xtsne_cell_embeddings')
# manually restrict the macrostates by passing a list of macrostate-names that you know are terminal in your data.
g.set_terminal_states_from_macrostates(['1','3','5'])
cr.pl.terminal_states(adata,basis="xtsne_cell_embeddings", dpi=150)
#Estimate fate probabilities
g.compute_absorption_probabilities()
g.absorption_probabilities
g.plot_absorption_probabilities(basis="xtsne_cell_embeddings")
g.plot_absorption_probabilities(same_plot=False,basis="xtsne_cell_embeddings", lineages=['3'], show_dp=False)
g.absorption_probabilities
#save
adata.obs.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_nasal_cellrank_obs.txt')
adata.var.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_nasal_cellrank_var.txt')
adata.write('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_nasal_cellrank.hdf5')
adata=sc.read_loom("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_somite.loom")
ldata = scv.read("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/velocity_9samples.loom", cache=True)
#remove W chr genes
wolp=adata.var_names.intersection(wgenes['ensembl_gene_id'])
print(wolp)
w_gene_indicator = np.in1d(adata.var_names, wgenes['ensembl_gene_id'])
adata = adata[:, ~w_gene_indicator]
adata_loom = ldata[:, adata.var_names]
adata = scv.utils.merge(adata, adata_loom); del ldata
adata.obs["seurat_clusters"] = adata.obs["seurat_clusters"].astype('category')
scv.pl.proportions(adata, groupby="seurat_clusters")
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')
print(adata.obs['seurat_clusters'].cat.categories)
recolor=["#8BD6FF","#008FD4","#00416A","#00CCFF"]
adata.uns['seurat_clusters_colors']=recolor
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')
## run scVelo
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='seurat_clusters',legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4,
dpi=120,save="figure1_j")
#error: https://github.com/theislab/scvelo/issues/279
#plt.savefig('/Users/menghan/Documents/projects/skeletonConvergence/manuscript/figures/figure1_formal/figure1_J-0.pdf',dpi=300)
#check if SOX9 in the variable genes
print('ENSGALG00000004386' in adata.var_names)
sox9=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_somite_sox9Exp.txt',sep='\t')
adata.obs['SOX9_RNA']=sox9['x'].values
#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='SOX9_RNA', color_map=newcmp,
legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_j_sox9")
sk_module=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_somite_skModule.txt',sep='\t')
adata.obs['skeletal_Module']=sk_module['skeletal_Features1'].values
#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='skeletal_Module', color_map=newcmp,
legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_j_skModule")
#Combining two kernels
#RNA velocity vectors can sometimes be very noisy - to regularize them, let’s combine the VelocityKernel with a ConnectivityKernel, computed on the basis of transcriptomic similarity
vk = VelocityKernel(adata)
vk.compute_transition_matrix()
print(vk)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata).compute_transition_matrix()
combined_kernel = 0.8 * vk + 0.2 * ck
print(combined_kernel)
g = GPCCA(combined_kernel)
print(g)
g.compute_schur(n_components=20)
#Above, we plotted the top 20 eigenvalues of the matrix T to see whether there is an apparent eigengap.
g.plot_spectrum()
g.plot_schur(use=3, basis='xtsne_cell_embeddings')
#Infer terminal states
#The next step in GPCCA is to find a linear combination of these vectors such that the Markov chain defined on the subset of states has large self-transition probability
g.compute_macrostates(n_states=3, cluster_key='seurat_clusters')
g.plot_macrostates(basis='xtsne_cell_embeddings')
g.plot_macrostates(same_plot=False,basis='xtsne_cell_embeddings')
g.plot_macrostates(discrete=True,basis='xtsne_cell_embeddings')
# manually restrict the macrostates by passing a list of macrostate-names that you know are terminal in your data.
g.set_terminal_states_from_macrostates(['2','1'])
cr.pl.terminal_states(adata,basis="xtsne_cell_embeddings", dpi=150)
#Estimate fate probabilities
g.compute_absorption_probabilities()
g.absorption_probabilities
g.plot_absorption_probabilities(basis="xtsne_cell_embeddings")
g.plot_absorption_probabilities(same_plot=False,basis="xtsne_cell_embeddings", lineages=['2'], show_dp=False)
g.absorption_probabilities
#save
adata.obs.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_somite_cellrank_obs.txt')
adata.var.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_somite_cellrank_var.txt')
adata.write('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_somite_cellrank.hdf5')
adata=sc.read_loom("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_limb.loom")
ldata = scv.read("/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/velocity_9samples.loom", cache=True)
#remove W chr genes
wolp=adata.var_names.intersection(wgenes['ensembl_gene_id'])
print(wolp)
w_gene_indicator = np.in1d(adata.var_names, wgenes['ensembl_gene_id'])
adata = adata[:, ~w_gene_indicator]
adata_loom = ldata[:, adata.var_names]
adata = scv.utils.merge(adata, adata_loom); del ldata
adata.obs["seurat_clusters"] = adata.obs["seurat_clusters"].astype('category')
scv.pl.proportions(adata, groupby="seurat_clusters")
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')
print(adata.obs['seurat_clusters'].cat.categories)
recolor=["#FFE78A","#FFB817","#662D0B","#FFD01F","#FFA309","#C49606","#88540B"]
adata.uns['seurat_clusters_colors']=recolor
scv.pl.scatter(adata, basis="xtsne_cell_embeddings", color='seurat_clusters')
## run scVelo
scv.pp.filter_and_normalize(adata, min_shared_counts=20, n_top_genes=2000)
sc.tl.pca(adata)
sc.pp.neighbors(adata, n_pcs=30, n_neighbors=30)
scv.pp.moments(adata, n_pcs=30, n_neighbors=30)
scv.tl.recover_dynamics(adata)
scv.tl.velocity(adata, mode='dynamical')
scv.tl.velocity_graph(adata)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='seurat_clusters',legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4,
dpi=120,save="figure1_k")
#error: https://github.com/theislab/scvelo/issues/279
#plt.savefig('/Users/menghan/Documents/projects/skeletonConvergence/manuscript/figures/figure1_formal/figure1_K-0.pdf',dpi=300)
#check if SOX9 in the variable genes
print('ENSGALG00000004386' in adata.var_names)
sox9=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_limb_sox9Exp.txt',sep='\t')
adata.obs['SOX9_RNA']=sox9['x'].values
#if not in adata.var_names, then cannot plot
#use SOX9 expression as color
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='SOX9_RNA', color_map=newcmp,
legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_k_sox9")
sk_module=pd.read_csv('/Users/menghan/Documents/projects/skeletonConvergence/results/reslt_scRNA/pseudotime/scrnaSub_limb_skModule.txt',sep='\t')
adata.obs['skeletal_Module']=sk_module['skeletal_Features1'].values
#sc.pl.scatter(adata, basis="xtsne_cell_embeddings", color='ENSGALG00000004386', color_map=newcmp)
scv.pl.velocity_embedding_stream(adata, basis='xtsne_cell_embeddings', color='skeletal_Module', color_map=newcmp,
legend_fontsize=8, smooth=.8, min_mass=3,
title='', size=20, figsize=(4,4), linewidth=0.4, dpi=120,save="figure1_k_skModule")
#Combining two kernels
#RNA velocity vectors can sometimes be very noisy - to regularize them, let’s combine the VelocityKernel with a ConnectivityKernel, computed on the basis of transcriptomic similarity
vk = VelocityKernel(adata)
vk.compute_transition_matrix()
print(vk)
from cellrank.tl.kernels import ConnectivityKernel
ck = ConnectivityKernel(adata).compute_transition_matrix()
combined_kernel = 0.8 * vk + 0.2 * ck
print(combined_kernel)
g = GPCCA(combined_kernel)
print(g)
g.compute_schur(n_components=20)
#Above, we plotted the top 20 eigenvalues of the matrix T to see whether there is an apparent eigengap.
g.plot_spectrum()
g.plot_schur(use=3, basis='xtsne_cell_embeddings')
#Infer terminal states
#The next step in GPCCA is to find a linear combination of these vectors such that the Markov chain defined on the subset of states has large self-transition probability
g.compute_macrostates(n_states=3, cluster_key='seurat_clusters')
g.plot_macrostates(basis='xtsne_cell_embeddings')
g.plot_macrostates(same_plot=False,basis='xtsne_cell_embeddings')
g.plot_macrostates(discrete=True,basis='xtsne_cell_embeddings')
# manually restrict the macrostates by passing a list of macrostate-names that you know are terminal in your data.
g.set_terminal_states_from_macrostates(['5','6','1'])
#g.set_terminal_states(['5','3','2'])
cr.pl.terminal_states(adata,basis="xtsne_cell_embeddings", dpi=150)
#Estimate fate probabilities
g.compute_absorption_probabilities()
g.absorption_probabilities
g.plot_absorption_probabilities(basis="xtsne_cell_embeddings")
g.plot_absorption_probabilities(same_plot=False,basis="xtsne_cell_embeddings", lineages=['5'], show_dp=False)
g.absorption_probabilities
#save
adata.obs.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_limb_cellrank_obs.txt')
adata.var.to_csv('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_limb_cellrank_var.txt')
adata.write('/Users/menghan/Documents/projects/skeletonConvergence/processedData/procss_scRNA/scrnaSub_limb_cellrank.hdf5')